package algorithms;

import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import datastructures.DicHashTreeNode;

/**
 * @author Yuval Kesten <blockquote> Dynamic Item Set Counting implemetation.
 *         Based upon the paper by <b>Sergey Brin,Rajeev Motwani,Jeffrey D.
 *         Ullman,Shalom Tsur</b> - <strong>Dynamic itemset counting and
 *         implication rules for market basket data (1997)</strong>
 *         </blockquote>
 */
public class Dic extends Algo {

	protected DicHashTreeNode hashTreeRoot;
	protected int M;
	protected int lastIter = Integer.MAX_VALUE;
	protected int estimatedNumOfBaskets;
	protected Map<Integer, Long> kDiscoveryIndices;
	protected int lastKWithDiscoveries;
	protected int maxBasketSize;

	public Dic(String pathFormat, int startIndex, int endIndex, int _M, Set<String> alphabetSet,
			int _estimatedNumOfBaskets) throws Exception {
		super(pathFormat, startIndex, endIndex, alphabetSet);
		hashTreeRoot = new DicHashTreeNode(0, 0);
		root = hashTreeRoot;
		estimatedNumOfBaskets = _estimatedNumOfBaskets;
		kDiscoveryIndices = new HashMap<Integer, Long>();
		kDiscoveryIndices.put(0, 1L);
		kDiscoveryIndices.put(1, 1L);

		// Small inputs work around
		// If M is greater than the input size we will set it to the input size
		// and then the Algorithm will run like Apriori
		M = _M < estimatedNumOfBaskets ? _M : estimatedNumOfBaskets;
	}

	protected void init() {
		hashTreeRoot.depth = 0;
		hashTreeRoot.set = null;
		lastKWithDiscoveries = 0;
		maxBasketSize = 0;
	}

	protected void startOfSingleIteration() {
		foundSomething = false;
		iterNum++;
	}

	public boolean endOfSingleIteration(double support, long basketIndex, boolean foundSomething) {
		estimatedNumOfBaskets = numOfBaskets;

		return true;
	}

	protected void pruneAndDiscover(double support) {
		//long start = System.currentTimeMillis();
		hashTreeRoot.pruneAtDepth(this, support, estimatedNumOfBaskets, k, basketIndex);
		//long end = System.currentTimeMillis();
		//long pruneTime = end - start;
		if (iterNum == 1 || k < maxBasketSize) {
			k++;
			kDiscoveryIndices.put(k, basketIndex);
		}
		//start = System.currentTimeMillis();
		foundSomething = (hashTreeRoot.discoverTillDepth(this, support, estimatedNumOfBaskets, k, basketIndex));
		//end = System.currentTimeMillis();
		//long dscoverTime = end - start;
		//System.out.println("pruneAndDiscover: " + basketIndex + "prune time: "+ pruneTime + " discoer time: "+dscoverTime);
	}

	/**
	 * Return the threshold (natural number) which an Item must appear more than
	 * to be considered as common. Since during the first iteration we don't
	 * know the number of baskets in our run we will return 0 so no item will be
	 * missed
	 * 
	 * @param support
	 *            - The relative support
	 * @return - The support as integer
	 */
	public int getSupportAsInt(double support, long discoveryIndex) {
		return (int) Math.floor(support * estimatedNumOfBaskets);
	}

	protected boolean processSingleLine(String line, double support) {
		String[] tmpBasket = Common.parseLineToBasket(line);

		if (iterNum == 1) {// Collect some stats
			numOfBaskets++;
			int basketSize = tmpBasket.length;
			numOfItems += basketSize;
			if (basketSize > maxBasketSize) {
				maxBasketSize = basketSize;
			}
		}

		handleBasket(tmpBasket, k, hashTreeRoot, basketIndex, support);
		if (basketIndex % M == 0) {
			pruneAndDiscover(support);
			if (!foundSomething) {
				long prevKDiscoveryIndex = kDiscoveryIndices.get(lastKWithDiscoveries);
				if (basketIndex - prevKDiscoveryIndex > estimatedNumOfBaskets) {
					return false;
				}
			} else {
				lastKWithDiscoveries = k;
			}
		}
		return true;
	}

	protected void handleBasket(String[] tmpBasket, int k, DicHashTreeNode root, long basketIndex, double support) {
		Arrays.sort(tmpBasket);
		int subsetsSize = k > tmpBasket.length ? tmpBasket.length : k;
		for (int i = 1; i <= subsetsSize; i++) {
			if (estimatedNumOfBaskets == Integer.MAX_VALUE
					|| basketIndex - kDiscoveryIndices.get(i) <= 2 * estimatedNumOfBaskets) {
				handleOneBasketPhase(tmpBasket, i, root, basketIndex, support);
			}
		}
	}

	protected void handleOneBasketPhase(String[] tmpBasket, int subsetsSize, DicHashTreeNode root, long basketIndex,
			double support) {
		List<List<String>> subsets = Common.getAllSubsets(tmpBasket, subsetsSize, getAlphabet());
		String word;
		if (subsets == null)
			return;
		DicHashTreeNode traverser;
		for (List<String> list : subsets) {
			traverser = root;
			if (traverser.childrens == null)
				continue;
			// assert(k==list.size());
			for (int i = 0; i < subsetsSize; i++) {
				word = list.get(i);
				if (traverser.childrens.containsKey(word)) {
					traverser = traverser.childrens.get(word);
					if (i == subsetsSize - 1) {
						traverser.encounterNode(basketIndex, estimatedNumOfBaskets,
								getSupportAsInt(support, traverser.discoveryIndex), iterNum);
					}
				} else if (i == 0) {
					Algo.incStats(1);
					traverser = new DicHashTreeNode(1, basketIndex, word, root, iterNum, estimatedNumOfBaskets, support);
				} else {
					break;
				}
			}
		}
	}
}
